Radar Chart

Age was standardized into months and categorized into self-defined bins.

Statistics were calculated to relate each data attribute and adoption outcome type:

Statistics were normalized from 0-1 and plotted on radar chart.

optipaw = read.csv("/Users/PengOlivia/Desktop/OptiPaw/optipaw_FINAL.csv")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(fmsb)
library(vcd)
## Warning: package 'vcd' was built under R version 4.3.3
## Loading required package: grid
## 
## Attaching package: 'vcd'
## 
## The following object is masked from 'package:fmsb':
## 
##     oddsratio
library(ltm)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: msm
## Warning: package 'msm' was built under R version 4.3.3
## Loading required package: polycor
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:ltm':
## 
##     factor.scores
## 
## The following object is masked from 'package:polycor':
## 
##     polyserial
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(lubridate)

library(tidyr)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:MASS':
## 
##     select
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
# Clean the dataset by removing rows with NA values
optipaw_clean <- na.omit(optipaw)
print(unique(optipaw_clean$Outcome.Type))
##  [1] "Return to Owner"  "Transfer"         "Adoption"         "Euthanasia"      
##  [5] "Died"             "Rto-Adopt"        "Disposal"         "Missing"         
##  [9] "Stolen"           "Relocate"         "Lost"             "Foster"          
## [13] "Reclaimed"        "Escaped"          "Released To Wild"
# Ensure consistent case for categorical column 'Animal.Type'
optipaw_clean$Animal.Type <- tolower(optipaw_clean$Animal.Type)

# Group age into meaningful categories (e.g., 0-1 year, 1-5 years, etc.)
optipaw_clean$Age_in_months <- optipaw_clean$Age * 12 # Convert Age to months
age_groups <- cut(optipaw_clean$Age, 
                  breaks = c(0, 12, 60, 120, Inf), 
                  labels = c("0-1 year", "1-5 years", "5-10 years", "10+ years"))

# Remove rows with NA values for Age
optipaw_clean <- optipaw_clean[!is.na(optipaw_clean$Age), ]

# Check if any NA values remain in Age
sum(is.na(optipaw_clean$Age))
## [1] 0
# Define a function to create radar charts for each outcome type
radarchart_outcome_type <- function(optipaw_clean, outcome_type) {
  
  # Convert relevant columns to appropriate data types
  optipaw_clean$Animal.Type <- as.factor(optipaw_clean$Animal.Type)
  optipaw_clean$Breed <- as.factor(optipaw_clean$Color)  # Assuming 'Color' is a proxy for 'Breed'
  optipaw_clean$Intake.Type <- as.factor(optipaw_clean$Intake.Type)
  optipaw_clean$Sex <- as.factor(optipaw_clean$Sex)
  optipaw_clean$Outcome.Type <- as.factor(optipaw_clean$Outcome.Type)
  
  # Calculate Cramér's V for categorical variables
  cramers_v_animal_type <- assocstats(table(optipaw_clean$Animal.Type, optipaw_clean$Outcome.Type))$cramer
  cramers_v_breed <- assocstats(table(optipaw_clean$Breed, optipaw_clean$Outcome.Type))$cramer
  cramers_v_intake_reason <- assocstats(table(optipaw_clean$Intake.Type, optipaw_clean$Outcome.Type))$cramer
  cramers_v_sex <- assocstats(table(optipaw_clean$Sex, optipaw_clean$Outcome.Type))$cramer
  
  # Create a binary variable for the given outcome_type vs. other outcomes
  binary_outcome_col <- paste0(outcome_type, "_Binary")
  optipaw_clean[[binary_outcome_col]] <- ifelse(optipaw_clean$Outcome.Type == outcome_type, 1, 0)
  
  # Calculate Point Biserial Correlation for Age and the selected outcome type
  age_correlation <- biserial.cor(optipaw_clean$Age_in_months, optipaw_clean[[binary_outcome_col]])
  
  # Normalize the correlation factors to (0,1)
  max_correlation <- max(cramers_v_animal_type, cramers_v_breed, cramers_v_intake_reason, cramers_v_sex, abs(age_correlation))
  
  # Normalize the values
  norm_animal_type <- cramers_v_animal_type / max_correlation
  norm_breed <- cramers_v_breed / max_correlation
  norm_intake_reason <- cramers_v_intake_reason / max_correlation
  norm_sex <- cramers_v_sex / max_correlation
  norm_age <- abs(age_correlation) / max_correlation
  
  # Prepare data for the radar chart
  data <- data.frame(
    Animal_Type = c(1, 0),
    Breed = c(1, 0),
    Age = c(1, 0),
    Intake_Reason = c(1, 0),
    Sex = c(1, 0)
  )
  
  # Add the normalized correlation values for each variable
  data <- rbind(data, c(
    norm_animal_type, 
    norm_breed,       
    norm_age,         
    norm_intake_reason, 
    norm_sex
  ))
  
  # Create radar chart
  radarchart(data, axistype = 1, 
             pcol = rgb(0.2, 0.5, 0.5, 0.9), 
             pfcol = rgb(0.2, 0.5, 0.5, 0.5), 
             plwd = 4, 
             cglcol = "grey", 
             cglty = 1, 
             axislabcol = "grey", 
             caxislabels = seq(0, 1, 0.2),  
             cglwd = 0.8,
             vlcex = 0.8) 
  
  # Add title
  title(paste(outcome_type))
}

# Set up grid for radar charts
par(mfrow = c(3, 3), mar = c(1, 1, 1, 1))

# Generate radar charts for different outcome types
radar_adoption <- radarchart_outcome_type(optipaw_clean, "Adoption")
radar_euthanasia <- radarchart_outcome_type(optipaw_clean, "Euthanasia")
radar_transfer <- radarchart_outcome_type(optipaw_clean, "Transfer")
radar_died <- radarchart_outcome_type(optipaw_clean, "Died")
radar_missing <- radarchart_outcome_type(optipaw_clean, "Missing")
radar_disposal <- radarchart_outcome_type(optipaw_clean, "Disposal")
radar_lost <- radarchart_outcome_type(optipaw_clean, "Lost")
radar_stolen <- radarchart_outcome_type(optipaw_clean, "Stolen")
radar_relocate <- radarchart_outcome_type(optipaw_clean, "Relocate")

radar_rto <- radarchart_outcome_type(optipaw_clean, "Return To Owner")
## [DATA NOT ENOUGH] at 3
## NaN
##  [DATA NOT ENOUGH] at 3
## NaN
##  [DATA NOT ENOUGH] at 3
## NaN
##  [DATA NOT ENOUGH] at 3
## NaN
##  [DATA NOT ENOUGH] at 3
## NaN
radar_rto_adoption <- radarchart_outcome_type(optipaw_clean, "Rto-Adopt")  # Pet returned to owner, then adopted

# Reset the plot layout
par(mfrow = c(1, 1))

Heatmap

# Step 1: Count frequency of unique combinations of Intake.Type and Outcome.Type
counts <- optipaw_clean %>%
  group_by(Intake.Type, Outcome.Type) %>%
  summarise(count = n(), .groups = 'drop')

# Step 2: Calculate total number of entries (sum of all unique combinations)
total_entries <- sum(counts$count)  # Sum of all occurrences, not just unique combinations

# Step 3: Convert frequency into percentage (count / total_entries) * 100
counts <- counts %>%
  mutate(percentage = (count / total_entries) * 100)  # Correct percentage calculation

# Step 4: Pivot the data for the heatmap, using only relevant columns (excluding 'count')
heatmap_data <- counts %>%
  select(-count) %>%  # Exclude the 'count' column to prevent it from being included in the heatmap
  pivot_wider(names_from = Intake.Type, values_from = percentage, values_fill = 0)

# Step 5: Create interactive heatmap using plotly
fig <- plot_ly(
  z = as.matrix(heatmap_data[,-1]),  # The percentages for each combination (excluding the first column Outcome.Type)
  x = colnames(heatmap_data)[-1],    # Intake types (from pivot_wider)
  y = heatmap_data$Outcome.Type,      # Outcome types
  type = "heatmap", 
  colorscale = "YlGnBu",  # Use a green-blue color scale
  reversescale = TRUE,    # Reverse the color scale so larger values are darker
  colorbar = list(title = "Percentage (%)")  # Title for color legend (now based on percentage)
)

# Step 6: Update layout for better visualization
fig <- fig %>%
  layout(
    title = "Percentage of Unique Combinations between Intake Type and Outcome Type",
    xaxis = list(title = "Intake Type"),
    yaxis = list(title = "Outcome Type")
  )

# Display the heatmap
fig

Time-Series Stacked Bar Plot

# Step 1: Convert the Outcome.Date to a date format and extract the month
optipaw <- optipaw %>%
  mutate(Outcome.Date = ymd_hms(Outcome.Date),  # Ensure Outcome.Date is parsed as a datetime
         month = month(Outcome.Date, label = TRUE))  # Extracts the month as a factor with labels
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Outcome.Date = ymd_hms(Outcome.Date)`.
## Caused by warning:
## !  8252 failed to parse.
# Step 2: Count frequency of each Outcome.Type per month
monthly_counts <- optipaw %>%
  group_by(month, Outcome.Type) %>%
  summarise(frequency = n(), .groups = 'drop') %>%
  complete(month, Outcome.Type, fill = list(frequency = 0))  # Fill missing combinations with 0

# Step 3: Calculate the total frequency per month
monthly_totals <- monthly_counts %>%
  group_by(month) %>%
  summarise(total_frequency = sum(frequency))

# Step 4: Merge total frequency with the original data and convert to percentages
monthly_counts <- monthly_counts %>%
  left_join(monthly_totals, by = "month") %>%
  mutate(percentage = (frequency / total_frequency) * 100)  # Convert to percentage

# Step 5: Reorder stacking order of outcome types to include all new values
monthly_counts$Outcome.Type <- factor(monthly_counts$Outcome.Type, levels = c(
  'Adoption',       
  'Rto-Adopt', 
  'Return to Owner', 
  'Relocate', 
  'Transfer', 
  'Euthanasia', 
  'Missing', 
  'Disposal', 
  'Stolen',
  'Lost', 
  'Died',
  'Reclaimed',
  'Released To Wild',
  'Escaped',
  'Foster'             
))

# Step 6: Define the colors for the new outcomes
colors <- c('Adoption' = 'darkblue', 
            'Rto-Adopt' = 'turquoise',
            'Return to Owner' = 'green',
            'Relocate' = 'lightgreen',
            'Transfer' = 'lightblue',
            'Euthanasia' = 'yellow', 
            'Missing' = 'orange', 
            'Disposal' = 'pink',
            'Stolen' = 'red',
            'Lost' = 'darkred', 
            'Died' = 'darkgray',
            'Reclaimed' = 'purple',
            'Released To Wild' = 'brown',
            'Escaped' = 'lightpink',
            'Foster' = 'lightyellow')

# Step 7: Create interactive stacked bar plot using plotly, now using percentage
fig <- plot_ly(
  data = monthly_counts, 
  x = ~month, 
  y = ~percentage,  # Use percentage instead of frequency
  color = ~Outcome.Type, 
  type = "bar", 
  colors = colors  # Apply the color scheme for the updated Outcome Types
) %>%
  layout(
    title = "Percentage of Outcome Types by Month",
    xaxis = list(title = "Months"),
    yaxis = list(title = "Percentage of Outcome Types", range = c(0, 100)),  # Set y-axis range to 0-100%
    barmode = 'stack'  # Stacks the bars
  )

fig
## Warning: Ignoring 15 observations

Comparative Box plot

For every outcome type, make a box plot on the distribution for time length in shelter.

# Convert 'Intake.Date' and 'Outcome.Date' to date format
optipaw$Intake.Date <- ymd_hms(optipaw$Intake.Date)
optipaw$Outcome.Date <- ymd_hms(optipaw$Outcome.Date)
## Warning: 1164 failed to parse.
# Calculate time in shelter in months
optipaw <- optipaw %>%
  mutate(Time.In.Shelter = time_length(interval(Intake.Date, Outcome.Date), "months")) %>%
  filter(Time.In.Shelter > 0)  # Remove any rows with negative or zero months

# Create the box plot using plotly
fig <- plot_ly(optipaw, y = ~Outcome.Type, x = ~Time.In.Shelter, type = "box", boxpoints = FALSE)
fig <- fig %>% layout(title = "Distribution of Time in Shelter (Months) by Outcome Type",
                      xaxis = list(title = "Length of Time in Shelter (Months)"),
                      yaxis = list(title = "Outcome Type"))
# Show the plot
fig